1 IMPORT

1.1 Import librairies

library(ggplot2)
library(cowplot)
library(tidyverse)
library(dplyr)
library(Seurat)
library(SCpubr)
library(patchwork)

setwd("~/Projects/HumanThymusProject/")
source("~/Projects/HumanThymusProject/scripts/colors_universal.R")

1.2 Import data

# integrated dataset
seur_integrated <- readRDS("./data_geo/seurat_objects/seurat_human_integrated_object_23_12_01.rds")
DimPlot(seur_integrated, reduction="umap_integrated", group.by="clusters_integrated_data", cols=cols_integrated)

# individual thymic seurat objects
seur_thym <- list(
  "nkt"=readRDS("./data_github/seurat_objects_thymus/seurat_thymus_inkt.rds"),
  "mait"=readRDS("./data_github/seurat_objects_thymus/seurat_thymus_mait.rds"),
  "gdt"=readRDS("./data_github/seurat_objects_thymus/seurat_thymus_gdt.rds")
)
# sanity check seurat objects correspond to Fig 2
DimPlot(seur_thym$nkt,  group.by="clusters_per_lineage", label=T, cols=cols_thym_nkt)

DimPlot(seur_thym$mait, group.by="clusters_per_lineage", label=T, cols=cols_thym_mait)

DimPlot(seur_thym$gdt,  group.by="clusters_per_lineage", label=T, cols=cols_thym_gdt)

2 COEXPRESSION TYPE 1/17 (Fig 2D)

2.1 Functions for plotting coexpression

The functions BlendMatrix, Melt, BlendMap and BlendExpression are taken from the Seurat package (these are hidden functions used when running FeaturePlot(..., blend=T)). The function PlotCoexpression is written in-house.

# Set color matrix
BlendMatrix <- function(
    n = 10,
    col.threshold = 0.5,
    two.colors = c("#ff0000", "#00ff00"),
    negative.color = "black"
) {
  if (0 > col.threshold || col.threshold > 1) {
    stop("col.threshold must be between 0 and 1")
  }
  C0 <- as.vector(col2rgb(negative.color, alpha = TRUE))
  C1 <- as.vector(col2rgb(two.colors[1], alpha = TRUE))
  C2 <- as.vector(col2rgb(two.colors[2], alpha = TRUE))
  blend_alpha <- (C1[4] + C2[4])/2
  C0 <- C0[-4]
  C1 <- C1[-4]
  C2 <- C2[-4]
  merge.weight <- min(255 / (C1 + C2 +  C0 + 0.01))
  sigmoid <- function(x) {
    return(1 / (1 + exp(-x)))
  }
  blend_color <- function(
    i,
    j,
    col.threshold,
    n,
    C0,
    C1,
    C2,
    alpha,
    merge.weight
  ) {
    c.min <- sigmoid(5 * (1 / n - col.threshold)) # 5*
    c.max <- sigmoid(5 * (1 - col.threshold)) # 5*
    c1_weight <- sigmoid(5 * (i / n - col.threshold)) # 5*
    c2_weight <- sigmoid(5 * (j / n - col.threshold)) # 5*
    c0_weight <-  sigmoid(5 * ((i + j) / (2 * n) - col.threshold)) # 5*
    c1_weight <- (c1_weight - c.min) / (c.max - c.min)
    c2_weight <- (c2_weight - c.min) / (c.max - c.min)
    c0_weight <- (c0_weight - c.min) / (c.max - c.min)
    C1_length <- sqrt(sum((C1 - C0) ** 2))
    C2_length <- sqrt(sum((C2 - C0) ** 2))
    C1_unit <- (C1 - C0) / C1_length
    C2_unit <- (C2 - C0) / C2_length
    C1_weight <- C1_unit * c1_weight
    C2_weight <- C2_unit * c2_weight
    C_blend <- C1_weight * (i - 1) * C1_length / (n - 1) + C2_weight * (j - 1) * C2_length / (n - 1) + (i - 1) * (j - 1) * c0_weight * C0 / (n - 1) ** 2 + C0
    C_blend[C_blend > 255] <- 255
    C_blend[C_blend < 0] <- 0
    return(rgb(
      red = C_blend[1],
      green = C_blend[2],
      blue = C_blend[3],
      alpha = alpha,
      maxColorValue = 255
    ))
  }
  blend_matrix <- matrix(nrow = n, ncol = n)
  for (i in 1:n) {
    for (j in 1:n) {
      blend_matrix[i, j] <- blend_color(
        i = i,
        j = j,
        col.threshold = col.threshold,
        n = n,
        C0 = C0,
        C1 = C1,
        C2 = C2,
        alpha = blend_alpha,
        merge.weight = merge.weight
      )
    }
  }
  return(blend_matrix)
}

# Plot color matrix
Melt <- function(x) {
  if (!is.data.frame(x = x)) {
    x <- as.data.frame(x = x)
  }
  return(data.frame(
    rows = rep.int(x = rownames(x = x), times = ncol(x = x)),
    cols = unlist(x = lapply(X = colnames(x = x), FUN = rep.int, times = nrow(x = x))),
    vals = unlist(x = x, use.names = FALSE)
  ))
}

BlendMap <- function(color.matrix, step=2, xtext='rows', ytext="cols") {
  color.heat <- matrix(
    data = 1:prod(dim(x = color.matrix)) - 1,
    nrow = nrow(x = color.matrix),
    ncol = ncol(x = color.matrix),
    dimnames = list(
      1:nrow(x = color.matrix),
      1:ncol(x = color.matrix)
    )
  )
  
  # xbreaks <- seq.int(from = 0, to = nrow(x = color.matrix), by = step)
  # ybreaks <- seq.int(from = 0, to = ncol(x = color.matrix), by = step)
  color.heat <- Melt(x = color.heat)
  color.heat$rows <- as.numeric(x = as.character(x = color.heat$rows))
  color.heat$cols <- as.numeric(x = as.character(x = color.heat$cols))
  color.heat$vals <- factor(x = color.heat$vals)
  plot <- ggplot(
    data = color.heat,
    mapping = aes_string(x = "rows", y = "cols", fill = 'vals')
  ) +
    geom_raster(show.legend = FALSE) +
    theme(plot.margin = unit(x = rep.int(x = 0, times = 4), units = 'cm')) +
    # scale_x_continuous(breaks = xbreaks, expand = c(0, 0), labels = xbreaks) +
    # scale_y_continuous(breaks = ybreaks, expand = c(0, 0), labels = ybreaks) +
    scale_fill_manual(values = as.vector(x = color.matrix)) +
    labs(x=xtext, y=ytext)+
    theme_cowplot()
  
  if(step!=0){
    xbreaks <- seq.int(from = 0, to = nrow(x = color.matrix), by = step)
    ybreaks <- seq.int(from = 0, to = ncol(x = color.matrix), by = step)
    plot <- plot +
      scale_x_continuous(breaks = xbreaks, expand = c(0, 0), labels = xbreaks) +
      scale_y_continuous(breaks = ybreaks, expand = c(0, 0), labels = ybreaks)
  } else if(step==0){
    plot <- plot+theme(axis.text=element_blank(), axis.ticks=element_blank())
  }
  
  return(plot)
}

# Normalize expression level of 2 features & return also a "blend" expression (corresponding to color matrix)
BlendExpression <- function(data, nlevels=100) {
  if (ncol(x = data) != 2) {
    stop("'BlendExpression' only blends two features")
  }
  features <- colnames(x = data)
  data <- as.data.frame(x = apply(
    X = data,
    MARGIN = 2,
    FUN = function(x) {
      return(round(x = (nlevels-1) * (x - min(x)) / (max(x) - min(x))))
    }
  ))
  data[, 3] <- data[, 1] + data[, 2] * nlevels
  # colnames(x = data) <- c(features, paste(features, collapse = '_'))
  colnames(x = data) <- c(features, "blend")
  for (i in 1:ncol(x = data)) {
    data[, i] <- factor(x = data[, i])
  }
  return(data)
}

PlotCoexpression <- function(seuratobj,
                             features,
                             reduction_name="umap",
                             plotting="blend",
                             pt_size=1,
                             set_negative_scores_to_0=F,
                             pwithmatrix=T,
                             matrix_coordinates=c(0.05, 0.06),
                             rasterdpi=300,
                             nlevels=100,
                             cols.neg="#969696", cols.pos=c("#74c476", "#fd8d3c"), col.threshold=0.5, colmatrix_stepsize=10,
                             order=T){
  # GET COLOR MATRIX
  # cat("\n-> Getting color matrix\n")
  color.matrix <- BlendMatrix(
    two.colors = cols.pos,
    col.threshold = col.threshold,
    negative.color = cols.neg,
    n=nlevels
  )
  
  # BLEND EXPRESSION
  # cat("\n-> Blending features expression\n")
  df <- seuratobj@meta.data[, features]
  if(set_negative_scores_to_0==T){
    df[df<0] <- 0
  }
  df <- BlendExpression(df, nlevels=nlevels) # 3 columns
  # head(df)
  # GET PLOTTING DATAFRAME
  # cat("\n-> Defining plotting DF\n")
  dims <- as.data.frame(seuratobj@reductions[[reduction_name]]@cell.embeddings)
  # head(dims)
  df_final <- merge(df, dims, by="row.names")
  rownames(df_final) <- df_final$Row.names
  df_final$Row.names <- NULL
  # print(tail(df_final))
  
  # PLOT
  if(plotting=="feature1"){
    # cat("\n-> Plotting feature 1\n")
    if(order==T){df_final <- df_final[order(df_final[,1]),]}
    df_final[,1] <- as.numeric(as.character(df_final[,1])) # transform factors to numbers for plotting
    p <- ggplot(df_final, aes_string(x=colnames(dims)[1], y=colnames(dims)[2], color=colnames(df_final)[1]))+
      geom_point(size=pt_size)+
      scale_color_gradient(low=color.matrix[1, 1], high=color.matrix[nrow(color.matrix), 1])
  }
  else if(plotting=="feature2"){
    # cat("\n-> Plotting feature 2\n")
    if(order==T){df_final <- df_final[order(df_final[,2]),]}
    df_final[,2] <- as.numeric(as.character(df_final[,2])) # transform factors to numbers for plotting
    p <- ggplot(df_final, aes_string(x=colnames(dims)[1], y=colnames(dims)[2], color=colnames(df_final)[2]))+
      geom_point(size=pt_size)+
      scale_color_gradient(low=color.matrix[1, 1], high=color.matrix[1, ncol(color.matrix)])
  }
  else if(plotting=="blend"){
    # cat("\n-> Plotting blended features\n")
    if(order==T){
      # order points by increasing value of score 1 + score 2 (coexpression)
      df_final <- df_final[order(as.numeric(as.character(df_final[,1]))+as.numeric(as.character(df_final[,2]))),] 
      # print(tail(df_final))
      }
    # Colors
    cols.use <- as.vector(color.matrix)
    names(cols.use) <- as.character(0:(length(cols.use)-1))
    # Plot
    p <- ggplot(df_final, aes_string(x=colnames(dims)[1], y=colnames(dims)[2], color=colnames(df_final)[3]))+
      geom_point(size=pt_size)+
      scale_color_manual(values=cols.use)+
      labs(x="UMAP1", y="UMAP2")+
      theme_cowplot()+
      theme(legend.position="none",
            axis.text=element_blank(),
            axis.title=element_blank(),
            axis.ticks = element_blank(),
            axis.line = element_blank(),
            panel.border=element_rect(color="white", fill=NA, size=1))
    # rasterise to avoid computer crashing
    p <- ggrastr::rasterise(p, layers="Point", dpi=rasterdpi)
    # add color matrix if specified
    if(pwithmatrix==T){
      # cat("\n-> Adding color matrix on plot\n")
      p <- ggdraw(p)+
        draw_plot(
          BlendMap(color.matrix, step=colmatrix_stepsize, xtext=features[1], ytext=features[2]),
          x=matrix_coordinates[1],y=matrix_coordinates[2],.25,.25
          )
    }
  }
  
  return(p)
}

2.2 Score gene signatures

We will score the type 1 and type 17 signatures on our integrated dataset, as with more cells (from thymus & blood) it will provide a better “background” for the choice of control genes. The type 1 and 17 gene signatures are inspired from previous publications such as Garner et al. and Bugaut et al., in addition to various publications on murine thymic Tinn development which define iNKT1/MAIT1/GD1 or iNKT17/MAIT17/GD17 gene signatures.

gene_signatures <- list(
  "type1"=c("EOMES", "GZMK", "CCL5", "TBX21", "NKG7", "GZMA", "PRF1", "IFNG", "KLRD1", "KLRC1", "SLAMF7", "XCL1", "IL2RB"),
  "type17"=c("RORC", "RORA", "CCR6", "IL23R", "BLK", "SCART1", "IL1R1", "ITGAE", "SERPINB1", "IL7R", "IL17RE")
)

# score
seur_integrated <- AddModuleScore(seur_integrated, features=gene_signatures, name=names(gene_signatures), seed=1)
colnames(seur_integrated@meta.data)[32:33] <- names(gene_signatures)

# add the type 1 and type 17 scored on integrated object in the individual thymic objects
seur_thym$nkt@meta.data[,c("score_type1", "score_type17")] <- seur_integrated@meta.data[rownames(seur_integrated@meta.data) %in% rownames(seur_thym$nkt@meta.data),c("type1", "type17")]
seur_thym$mait@meta.data[,c("score_type1", "score_type17")] <- seur_integrated@meta.data[rownames(seur_integrated@meta.data) %in% rownames(seur_thym$mait@meta.data),c("type1", "type17")]
seur_thym$gdt@meta.data[,c("score_type1", "score_type17")] <- seur_integrated@meta.data[rownames(seur_integrated@meta.data) %in% rownames(seur_thym$gdt@meta.data),c("type1", "type17")]

2.3 Plot coexpression of type 1/17 on scatter plot

# make dataframe
df <- rbind(
  as.data.frame(seur_thym$nkt@meta.data) %>%
    select(score_type1, score_type17, clusters_per_lineage) %>%
    mutate(tcell_lineage="iNKT"),
  as.data.frame(seur_thym$mait@meta.data) %>%
    select(score_type1, score_type17, clusters_per_lineage) %>%
    mutate(tcell_lineage="MAIT"),
  as.data.frame(seur_thym$gdt@meta.data) %>%
    select(score_type1, score_type17, clusters_per_lineage) %>%
    mutate(tcell_lineage="GD")
)

summary(df)
##   score_type1       score_type17      clusters_per_lineage tcell_lineage     
##  Min.   :-0.8019   Min.   :-0.38516   Length:10166         Length:10166      
##  1st Qu.:-0.5682   1st Qu.:-0.15923   Class :character     Class :character  
##  Median :-0.4890   Median : 0.03684   Mode  :character     Mode  :character  
##  Mean   :-0.4282   Mean   : 0.03396                                          
##  3rd Qu.:-0.4085   3rd Qu.: 0.15623                                          
##  Max.   : 1.6457   Max.   : 1.24695
ggplot(
  # df %>% filter(cluster_per_lineage %in% c("NKT_c5", "NKT_c6", "MAIT_c6", "GDT_c7")),
  df,
  aes(x=score_type1, y=score_type17, color=clusters_per_lineage)
)+
  geom_point()+
  facet_wrap(~factor(tcell_lineage, levels=c("iNKT", "MAIT", "GD")), nrow=1)+
  scale_color_manual(values=c(cols_thym_nkt, cols_thym_mait, cols_thym_gdt))+
  labs(x="Type 1 signature", y="Type 17 signature")+
  theme_cowplot()

2.4 Plot coexpression on UMAP (Fig 2D)

# iNKT
PlotCoexpression(
  seuratobj = seur_thym$nkt,
  features = c("score_type1", "score_type17"),
  set_negative_scores_to_0 = T, # whether to have negative signature scores set to 0 (negative scores mean that signature genes are expressed less than control genes)
  plotting = "blend",
  pt_size=2,
  matrix_coordinates = c(0, 0), # where to place the color matrix on plot
  cols.neg="#bdbdbd", # negative color
  cols.pos=c("red", "blue"),
  col.threshold=0.1, # at which threshold to switch from grey to color
  colmatrix_stepsize=100, # how many steps in the color matrix
  order=T # whether to order cells
)

# MAIT
PlotCoexpression(
  seuratobj = seur_thym$mait,
  features = c("score_type1", "score_type17"),
  set_negative_scores_to_0 = T,
  plotting = "blend",
  matrix_coordinates = c(0.7, 0.06),
  cols.neg="#bdbdbd",
  cols.pos=c("red", "blue"),
  pt_size=2,
  col.threshold=0.1,
  colmatrix_stepsize=100,
  order=T
)

# GD
PlotCoexpression(
  seuratobj = seur_thym$gdt,
  features = c("score_type1", "score_type17"),
  set_negative_scores_to_0 = T,
  plotting = "blend",
  matrix_coordinates = c(0, 0),
  cols.neg="#bdbdbd",
  cols.pos=c("red", "blue"),
  pt_size=2,
  col.threshold=0.1,
  colmatrix_stepsize=100,
  order=T
)

3 EXPRESSION OF EFFECTOR GENES (Fig 2E)

3.1 Function for plotting density of expression

plot_genes_nebulosa <- function(seuratobj, genes_vector, pgrid_size=c(40,5)){
  
  pnebulosa <- list()
  for(gene in genes_vector) {
    p <- NULL
    p <- SCpubr::do_NebulosaPlot(seuratobj, features = gene, pt.size=5, border.size=0.5)
    # p <- ggrastr::rasterise(p, layers="Point", dpi=200)
    pnebulosa[[gene]] <- p
  }
  pgrid <- plot_grid(plotlist=pnebulosa, nrow=2)
  
  return(pgrid)
}

3.2 Plot genes of interest (Fig 2E)

genes_of_interest <- c("ZBTB16", "CCR9", "CCR7", "KLRB1", "EOMES", "GZMK", "CCR6", "RORA")

plot_genes_nebulosa(seur_thym$nkt, genes_vector = genes_of_interest)

plot_genes_nebulosa(seur_thym$mait, genes_vector = genes_of_interest)

plot_genes_nebulosa(seur_thym$gdt, genes_vector = genes_of_interest)

4 SESSION INFO

sessionInfo()
## R version 4.1.3 (2022-03-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur/Monterey 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] patchwork_1.1.2    SCpubr_2.0.2       SeuratObject_4.1.3 Seurat_4.3.0.1    
##  [5] forcats_1.0.0      stringr_1.5.0      dplyr_1.1.2        purrr_1.0.1       
##  [9] readr_2.1.4        tidyr_1.3.0        tibble_3.2.1       tidyverse_1.3.2   
## [13] cowplot_1.1.1      ggplot2_3.4.2     
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.3                  spatstat.explore_3.2-1     
##   [3] reticulate_1.30             ks_1.14.0                  
##   [5] tidyselect_1.2.0            htmlwidgets_1.6.2          
##   [7] grid_4.1.3                  Rtsne_0.16                 
##   [9] munsell_0.5.0               codetools_0.2-19           
##  [11] ica_1.0-3                   future_1.33.0              
##  [13] miniUI_0.1.1.1              withr_2.5.0                
##  [15] spatstat.random_3.1-5       colorspace_2.1-0           
##  [17] progressr_0.13.0            Biobase_2.54.0             
##  [19] highr_0.10                  knitr_1.43                 
##  [21] rstudioapi_0.14             stats4_4.1.3               
##  [23] SingleCellExperiment_1.16.0 ROCR_1.0-11                
##  [25] tensor_1.5                  listenv_0.9.0              
##  [27] MatrixGenerics_1.6.0        labeling_0.4.2             
##  [29] GenomeInfoDbData_1.2.7      polyclip_1.10-4            
##  [31] farver_2.1.1                Nebulosa_1.4.0             
##  [33] parallelly_1.36.0           vctrs_0.6.3                
##  [35] generics_0.1.3              xfun_0.39                  
##  [37] timechange_0.2.0            R6_2.5.1                   
##  [39] GenomeInfoDb_1.30.1         ggbeeswarm_0.7.2           
##  [41] bitops_1.0-7                spatstat.utils_3.0-3       
##  [43] cachem_1.0.8                DelayedArray_0.20.0        
##  [45] assertthat_0.2.1            promises_1.2.0.1           
##  [47] scales_1.2.1                googlesheets4_1.1.1        
##  [49] beeswarm_0.4.0              gtable_0.3.3               
##  [51] Cairo_1.6-0                 globals_0.16.2             
##  [53] goftest_1.2-3               rlang_1.1.1                
##  [55] splines_4.1.3               lazyeval_0.2.2             
##  [57] gargle_1.5.1                spatstat.geom_3.2-1        
##  [59] broom_1.0.5                 yaml_2.3.7                 
##  [61] reshape2_1.4.4              abind_1.4-5                
##  [63] modelr_0.1.11               backports_1.4.1            
##  [65] httpuv_1.6.11               tools_4.1.3                
##  [67] ellipsis_0.3.2              jquerylib_0.1.4            
##  [69] RColorBrewer_1.1-3          BiocGenerics_0.40.0        
##  [71] ggridges_0.5.4              Rcpp_1.0.10                
##  [73] plyr_1.8.8                  zlibbioc_1.40.0            
##  [75] RCurl_1.98-1.12             deldir_1.0-9               
##  [77] pbapply_1.7-2               viridis_0.6.3              
##  [79] S4Vectors_0.32.4            zoo_1.8-12                 
##  [81] SummarizedExperiment_1.24.0 haven_2.5.3                
##  [83] ggrepel_0.9.3               cluster_2.1.4              
##  [85] fs_1.6.2                    magrittr_2.0.3             
##  [87] data.table_1.14.8           scattermore_1.2            
##  [89] lmtest_0.9-40               reprex_2.0.2               
##  [91] RANN_2.6.1                  googledrive_2.1.1          
##  [93] mvtnorm_1.2-2               fitdistrplus_1.1-11        
##  [95] matrixStats_1.0.0           hms_1.1.3                  
##  [97] mime_0.12                   evaluate_0.21              
##  [99] xtable_1.8-4                mclust_6.0.0               
## [101] readxl_1.4.2                IRanges_2.28.0             
## [103] gridExtra_2.3               compiler_4.1.3             
## [105] KernSmooth_2.23-21          crayon_1.5.2               
## [107] htmltools_0.5.5             later_1.3.1                
## [109] tzdb_0.4.0                  lubridate_1.9.2            
## [111] DBI_1.1.3                   dbplyr_2.3.2               
## [113] MASS_7.3-60                 Matrix_1.5-4.1             
## [115] cli_3.6.1                   parallel_4.1.3             
## [117] igraph_1.5.0                GenomicRanges_1.46.1       
## [119] pkgconfig_2.0.3             sp_2.0-0                   
## [121] plotly_4.10.2               spatstat.sparse_3.0-2      
## [123] xml2_1.3.4                  vipor_0.4.5                
## [125] bslib_0.5.0                 XVector_0.34.0             
## [127] rvest_1.0.3                 digest_0.6.32              
## [129] sctransform_0.3.5           RcppAnnoy_0.0.21           
## [131] pracma_2.4.2                spatstat.data_3.0-1        
## [133] rmarkdown_2.23              cellranger_1.1.0           
## [135] leiden_0.4.3                uwot_0.1.16                
## [137] shiny_1.7.4                 lifecycle_1.0.3            
## [139] nlme_3.1-162                jsonlite_1.8.7             
## [141] viridisLite_0.4.2           fansi_1.0.4                
## [143] pillar_1.9.0                lattice_0.21-8             
## [145] ggrastr_1.0.2               fastmap_1.1.1              
## [147] httr_1.4.6                  survival_3.5-5             
## [149] glue_1.6.2                  png_0.1-8                  
## [151] stringi_1.7.12              sass_0.4.6                 
## [153] irlba_2.3.5.1               future.apply_1.11.0